import logging
from init import PATHS
LOGGER = logging.getLogger(__name__)
import pandas as pd
import os



def main():
    LOGGER.info('BEGIN')
    subs1 = pd.read_csv(PATHS.dropbox / 'Data_outputted/A_AutoIndustry/subsidiaries.csv')
    subsidiaries = list(set(subs1['Subsidiary BvD ID']))
    subsidiary_names = get_names(subsidiaries)
    subsidiary_names.to_csv(PATHS.dropbox / 'Data_outputted/A_AutoIndustry/subsidiaries_names.csv',index=False)
    subsidiary_industry = get_industry(subsidiaries)
    subsidiary_industry.to_csv(PATHS.dropbox / 'Data_outputted/A_AutoIndustry/subsidiaries_industry.csv',index=False)


def get_names(bvdid_list):
    subsidiary_names = []
    files = os.listdir(PATHS.orbis / 'Descriptive/BvD_ID_and_Name/by_country/csv/')
    for i,file in enumerate(files):
        LOGGER.info('Country: {}. {} / {}'.format(file, i, len(files)))
        df = []
        for dfchunk in pd.read_csv(PATHS.orbis / 'Descriptive/BvD_ID_and_Name/by_country/csv/' + file, sep='\t', quoting=3,chunksize=2000000):
            mask = dfchunk['BvD ID number'].isin(bvdid_list)
            dfchunk = dfchunk[mask]
            df.append(dfchunk)
        df = pd.concat(df)
        subsidiary_names.append(df)
    subsidiary_names = pd.concat(subsidiary_names)
    return subsidiary_names


def get_industry(bvdid_list):
    subsidiary_industry = []
    files = os.listdir(PATHS.orbis / 'Descriptive/Industry_classifications/by_country/csv/')
    for i,file in enumerate(files):
        LOGGER.info('Country: {}. {} / {}'.format(file, i, len(files)))
        df = []
        for dfchunk in pd.read_csv(PATHS.orbis / 'Descriptive/Industry_classifications/by_country/csv/' / file, sep='\t', quoting=3, chunksize=2000000, usecols = ['BvD ID number','US SIC, Primary code(s)', 'US SIC primary code, text description','NAICS, Primary code(s)','NAICS, Primary code(s), text description']):
            mask = dfchunk['BvD ID number'].isin(bvdid_list)
            dfchunk = dfchunk[mask]
            df.append(dfchunk)
        df = pd.concat(df)
        subsidiary_industry.append(df)
    subsidiary_industry = pd.concat(subsidiary_industry)
    return subsidiary_industry

